## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
pacman::p_load(
  # Data Manipulation & Cleaning
  tidyverse, janitor, fs, glue,

  # Data Visualization & Plotting
  ggthemes, ggridges, ggrepel, viridis, patchwork,

  # Statistical Modeling & Analysis
  brms, BradleyTerry2, icr,

  # Utilities & Performance
  remotes, tictoc
)

print(getwd())

## Colors for visualization
ger_color  <- c(
  "AfD" = "#00ADEF",
  "CDU/CSU" = "grey60",
  "FDP" = "#ebcc34",# "#ffed00",
  "Greens" = "#46962b",
  "SPD" = "#E2001A",
  "Left" = "#8B1A1A"
)
# knitr::purl("README.Rmd", output = "main_script.R")

if(!dir.exists("model")) dir_create("model")
if(!dir.exists("plot")) dir_create("plot")

## ----Loading artefacts for main result---------------------------------------------------------------------------------------------------------------------------------------------------
# Raw comparisons
raw_answers <- read_rds("input/0_respondents_rating.rds") %>%
  mutate(
    id1 = as.numeric(str_extract(mp_1, "\\d+")),
    id2 = as.numeric(str_extract(mp_2, "\\d+"))
  ) %>%
  glimpse


# Helpers for individual visulation of MPs
targets <- read_rds("input/targets.rds") %>%
  mutate(pageid = as.factor(pageid)) %>%
  glimpse


# Estimates for each individual MP
lambdas <- read_rds("data/0_mp_params.rds") %>%
  mutate(pageid = pageid %>%
           as.factor() %>%
           fct_reorder(mean) %>%
           fct_reorder(as.numeric(party))
  ) %>%
  glimpse

# Respondent specific biases
respondent_bias <- read_rds("data/0_respondent_bias.rds") %>%
  glimpse


## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
unique_comparisons <- raw_answers %>% filter(id1 > id2)
comparisons_count_by_mp <- raw_answers %>% count(mp_1, sort = T)
sum_stats <- summary(comparisons_count_by_mp$n)
respondent_counts <- raw_answers %>%
  summarise(n = n()/2,
            unique_mps = length(unique(mp_1)),
            .by = c(respondent, respondent_party)) %>%
  arrange(desc(n))
sum_stats_respondent = summary(respondent_counts$n)
sum_stats_respondent_unique_mps = summary(respondent_counts$unique_mps)


print(glue("Total number of comparisons: {nrow(unique_comparisons)}"))
print(glue("{comparisons_count_by_mp$mp_1[1]} (Angela Merkel) was compared {comparisons_count_by_mp$n[1]} times"))
print(glue("On average, each MP was compared {round(sum_stats[['Mean']], 2)} times (Median: {sum_stats[['Median']]})"))
print(glue("On average, each respondent compared {round(sum_stats_respondent[['Mean']], 2)} pairs of MPs (Median: {sum_stats_respondent[['Median']]})"))
print(glue("On average, each respondent compared {round(sum_stats_respondent_unique_mps[['Mean']], 2)} unique MPs (Median: {sum_stats_respondent_unique_mps[['Median']]})"))



## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
krippalpha_dt <- raw_answers %>%
  filter(outcome != 2) %>%
  mutate(
    id1 = as.numeric(str_extract(mp_1, "\\d+")),
    id2 = as.numeric(str_extract(mp_2, "\\d+"))
  ) %>%
  filter(id1 < id2) %>%
  mutate(id = paste(mp_1, mp_2)) %>%
  filter(n() > 2, .by = id) %>%
  select(id, respondent, outcome) %>%
  mutate(outcome = recode(outcome, "0" = -1, "1" = 1, "2" = 0))

print(glue("Krippendorff's alpha computed with {nrow(krippalpha_dt)} comparisons featuring {nrow(count(krippalpha_dt, id))} unique paris"))

krippalpha_results <- krippalpha_dt %>%
  pivot_wider(id_cols = id, names_from = respondent, values_from = outcome) %>%
  select(-id) %>%
  as.matrix() %>%
  t() %>%
  krippalpha(data = ., metric = "nominal")

print(glue("Krippendorff's alpha of {round(krippalpha_results[['alpha']], 2)}"))


## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
ccs_correlation <- cor(as.numeric(lambdas$ccs_lr), lambdas$mean, use = 'complete.obs')

print(glue("The faction, provided by Sältzer (2020), was available for {lambdas %>% filter(!is.na(faction)) %>% nrow()} MPs."))
print(glue("We were able to merge {lambdas %>% filter(!is.na(ccs_lr)) %>% nrow()} from the Comparative Candidate Survey based on respondents party, region and position on the list"))
print(glue("The correlation between our estimates and the Comparative Candidate Survey is {round(ccs_correlation, 3)}"))


## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
lambdas %>%
  ggplot(aes(x = mean, y = party, fill = party)) +
  geom_density_ridges(alpha = .7, color = "grey50", quantile_lines = T, quantiles = 2,
                      jittered_points = TRUE,
                      position = position_points_jitter(width = 0.05, height = 0),
                      point_shape = '|', point_size = 4, point_alpha = .9) +
  scale_fill_manual(values = ger_color) +
  guides(fill = "none") +
  labs(x = "Ideological Position", y = "") +
  theme_few()

ggsave(filename = "plot/2_ideological_distribution_of_parties.png", width = 8, height = 5)


## ----fig.width=12, fig.height=6----------------------------------------------------------------------------------------------------------------------------------------------------------
label_dt <- lambdas %>%
  inner_join(targets) %>%
  mutate(
    nudge = case_when(
      # Ralph Brinkhaus, Thomas L. Kemmerich, Wolfgang Kubicki, Katja Suding, Cem Özdemir, Anton Hofreiter, Claudia Roth
      pageid %in% as.numeric(c("636", "699", "399", "404", "372", "427", "406", "431")) ~ 7,
      #Philipp Amthor, Carsten Linnemann, Helge Braun, Heiko Maas, Saskia Esken, Aydan Özoğuz, Jan Korte, Bernd Riexinger, Jürgen Trittin
      pageid %in% as.numeric(c("209", "210", "11", "20", "555", "605", "508", "284", "268", "441")) ~ -7,
      TRUE ~ 7*nudge
    )
  )

lambdas %>%
  ggplot(aes(x = pageid, y = mean, color = party)) +
  geom_point(size = .5, show.legend = T) +
  scale_color_manual(values = ger_color) +
  labs(y = "Ideological Position", x = "") +
  geom_text_repel(data = label_dt, aes(label = name), nudge_y = label_dt$nudge, max.iter = 3000, size = 3, force = 10) +
  geom_errorbar(aes(ymin = low, ymax = high), size = .2, alpha = .25, show.legend = F) +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        panel.border = element_blank(),
        legend.position = "none")


ggsave(filename = "plot/3_individual_positions.png", width = 12, height = 6)


## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
lambdas %>%
  filter(party %in% c("Greens", "SPD", "CDU/CSU", "Left", "AfD")) %>%
  mutate(faction = case_when(party == "Greens" & faction == "FUNDI" ~ "Greens - Fundamentalist",
                             party == "Greens" & faction == "REAL" ~ "Greens - Realist",
                             party == "SPD" & faction == "SEEH" ~ "SPD - Seeheim Circle",
                             party == "SPD" ~ "SPD - Rest of the Party",
                             party == "CDU/CSU" &  region == "Bayern" ~ "CSU",
                             party == "CDU/CSU" ~ "CDU",
                             T ~ NA_character_)) %>%
  filter(!is.na(faction)) %>%
  mutate(faction = fct_reorder(as.character(faction), mean)) %>%
  ggplot(aes(x = mean, y = faction, fill = party)) + geom_boxplot(alpha = .5, show.legend = F) +
  scale_fill_manual(values = ger_color) +
  theme_bw() +
  labs(y = "", x = "Ideological Position")

ggsave(filename = "plot/4_comparisons_between_factions.png", width = 8, height = 5)



## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
set.seed(888)
ccs_correlation <- cor(as.numeric(lambdas$ccs_lr), lambdas$mean, use = 'complete.obs')

lambdas %>%
  filter(!is.na(party) & !is.na(ccs_lr)) %>%
  mutate(mean = 10*(mean - min(mean))/(max(mean) - min(mean))) %>%
  ggplot(aes(x = mean, y = ccs_lr, color = party)) +
  geom_jitter(height = 0.25) +
  scale_color_manual(values = ger_color) +
  theme_bw() +
  labs(subtitle = glue("Correlation: {round(ccs_correlation, 3)}"),
       x = "Ideological Position (our estimate)",
       y = "MPs' self-estimate (CCS)",
       color = "") +
  theme(plot.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA))

ggsave(filename = "plot/5_comparison_with_ccs.png", width = 8, height = 5)



## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
custom_boxplot_stats_90 <- function(x) {
  r <- quantile(x, probs = c(0.05, 0.25, 0.5, 0.75, 0.95))
  names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
  return(r)
}

# Function to compute 95% interval statistics for the boxplot
custom_boxplot_stats_95 <- function(x) {
  r <- quantile(x, probs = c(0.025, 0.25, 0.5, 0.75, 0.975))
  names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
  return(r)
}

respondent_bias %>%
  mutate(user = paste("Resp ", as.character(as.numeric(as.factor(respondent)))) %>% fct_reorder(mean)) %>%
  ggplot(aes(x = mean, y = user)) +
  stat_summary(fun.data = custom_boxplot_stats_95, geom = "boxplot", fatten = 2, width = 0.6, lwd = 0.2, color = "blue", alpha = 0) +
  stat_summary(fun.data = custom_boxplot_stats_90, geom = "boxplot", fatten = 2, width = 0.6, alpha = 0.6) +
  theme_minimal() +
  coord_cartesian(xlim = c(-1.5, 1.5)) +
  labs(x = "Estimated respondent specific bias", y = "Respondents\n") +
  theme(plot.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA))

ggsave(filename = "plot/6_respondent_bias.png", width = 8, height = 5)
